#################################################################################
########################## Lab6 Prophet ###############################
############## -------------- AirQuality ------------- #######################
#################################################################################
# Load necessary libraries
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1 ✔ fma 2.5
## ✔ forecast 8.22.0 ✔ expsmooth 2.3
##
library(prophet) # https://facebook.github.io/prophet/docs/quick_start.html
## Loading required package: Rcpp
## Loading required package: rlang
library(MLTools)
## __ __ _ _________
## | \ / | | | |___ ___| _____ _____ _ _____
## | \/ | | | ____ | | | _ | | _ | | | | ___|
## | |\ /| | | | |____| | | | | | | | | | | | | |___ |
## | | \/ | | | |____ | | | |_| | | |_| | | |__ ___| |
## |_| |_| |______| |_| |_____| |_____| |____| |_____|
##
## Learning is fun, Machine Learning is funnier
## -----------------------------------------
## With great power comes great responsibility
## -----------------------------------------
## Created by: Jos<e9> Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu>
## Guillermo Mestre Marcos <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
##
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr) # For prophet_df manipulation
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Set working directory -------------------------------------------------------
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# 0. Load data -----------------------------------------------------------------
fdata <- read.table("prophet_data.csv", sep = ";", header = TRUE)
head(fdata)
## date output input1 input2
## 1 2015-01-01 0.3947508 0.01325662 7.713629
## 2 2015-01-02 0.7071801 0.01314552 7.308481
## 3 2015-01-03 -0.7920484 0.01264853 7.474993
## 4 2015-01-04 -0.3877153 0.01215471 6.913461
## 5 2015-01-05 -2.4661411 0.01166405 7.110118
## 6 2015-01-06 -2.3676671 0.01117658 6.310578
fdata$date <- as.Date(fdata$date)
# Order and Check for missing dates
fdata <- fdata |> arrange(date)
date_range <- seq(min(fdata$date), max(fdata$date), by = "day")
date_range[!date_range %in% fdata$date]
## Date of length 0
sum(is.na(fdata$output))
## [1] 0
# Check for duplicates
length(fdata$date)
## [1] 2922
length(unique(fdata$date))
## [1] 2922
ggtsdisplay(fdata$output, lag.max=100)

# Leave a gap of 182 days for testing
train_data <- fdata[1:(nrow(fdata) - 182), ]
test_data <- fdata[(nrow(fdata) - 182 + 1):nrow(fdata), ]
prophet_df <- data.frame(
ds = train_data$date, # Dates must be named ds
# y = train_data$output # Output must be named y
y = train_data$output, # Output must be named y
x1 = train_data$input1, # Input can be named whatever, x1 for convenience
x2 = train_data$input2 # Input can be named whatever, x2 for convenience
)
# 1. Model without seasonality -------------------------------------------------
model1 <- prophet(
yearly.seasonality = FALSE,
weekly.seasonality = FALSE,
daily.seasonality = FALSE
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model1, prophet_df)
p <- plot(model1, predictions) + add_changepoints_to_plot(model1)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model1, predictions)

# Check residuals
residuals_model_1 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_1, 36)

# 2. Model with custom changepoints --------------------------------------------
changepoints <- as.Date(c("2017-01-01", "2019-06-01", "2022-01-01"))
model2 <- prophet(
yearly.seasonality = FALSE,
weekly.seasonality = FALSE,
daily.seasonality = FALSE,
changepoints = changepoints
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model2, prophet_df)
p <- plot(model2, predictions) + add_changepoints_to_plot(model2)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model2, predictions)

# Check residuals
residuals_model_2 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_2, 36)

# 3. Model with seasonality (yearly and weekly) --------------------------------
model3 <- prophet(
# model3 <- prophet(mcmc.samples = 4000,
yearly.seasonality = TRUE,
weekly.seasonality = TRUE,
daily.seasonality = FALSE,
changepoints = changepoints
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model3, prophet_df)
p <- plot(model3, predictions) + add_changepoints_to_plot(model3)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model3, predictions)

# Check residuals
residuals_model_3 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_3, 36)

# 4. Model with custom seasonality (monthly) -----------------------------------
model4 <- prophet(
yearly.seasonality = TRUE,
weekly.seasonality = TRUE,
daily.seasonality = FALSE,
changepoints = changepoints
) %>% add_seasonality(
name = 'monthly',
period = 30.5,
fourier.order = 3
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model4, prophet_df)
p <- plot(model4, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model4, predictions)

# Check residuals
residuals_model_4 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_4, 36)

# 5. Model with external regressors --------------------------------------------
ggtsdisplay(fdata$input1, lag.max=36)

ggtsdisplay(fdata$input2, lag.max=36)

# Add regressors to the prophet_df frame
model5 <- prophet(
yearly.seasonality = TRUE,
weekly.seasonality = TRUE,
daily.seasonality = FALSE,
changepoints = changepoints
) %>% add_seasonality(
name = 'monthly',
period = 30.5,
fourier.order = 5
) %>% add_regressor(
'x1'
) %>% add_regressor(
'x2'
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model5, prophet_df)
p <- plot(model5, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model5, predictions)

# Check residuals
residuals_model_5 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_5, 36)

outliers_model_5_indx <- which(abs(residuals_model_5) > (mean(residuals_model_5) + 3 * sd(residuals_model_5)))
print(fdata$date[outliers_model_5_indx])
## [1] "2015-12-27" "2015-12-29" "2017-06-09" "2018-03-25" "2018-12-25"
## [6] "2018-12-27" "2018-12-28" "2019-02-23" "2019-06-17" "2019-12-25"
## [11] "2019-12-29" "2020-12-21" "2020-12-28"
# 6. Model with holidays -------------------------------------------------------
# Create synthetic holidays
holidays <- data.frame(
holiday = 'christmas',
ds = seq.Date(from = as.Date("2015-12-24"), to = as.Date("2022-12-24"), by = "year"),
lower_window = 0, # Holidays affect lower_window previous days
upper_window = 5 # Holidays affect upper_window next days
)
model6 <- prophet(
yearly.seasonality = TRUE,
weekly.seasonality = TRUE,
daily.seasonality = FALSE,
changepoints = changepoints,
holidays = holidays
) %>% add_seasonality(
name = 'monthly',
period = 30.5,
fourier.order = 5
) %>% add_regressor(
'x1'
) %>% add_regressor(
'x2'
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model6, prophet_df)
p <- plot(model6, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model6, predictions)

# Check residuals
residuals_model_6 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_6, 36)

# 7. Model with adjusted hyperparameters ---------------------------------------
# Manually set hyperparameters
model7 <- prophet(
yearly.seasonality = 3,
weekly.seasonality = 3,
daily.seasonality = FALSE,
changepoints = changepoints,
holidays = holidays
) %>% add_seasonality(
name = 'monthly',
period = 30.5,
fourier.order = 5
) %>% add_regressor(
'x1'
) %>% add_regressor(
'x2'
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model7, prophet_df)
p <- plot(model7, predictions) + add_changepoints_to_plot(model7)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model7, predictions)

# Check residuals
residuals_model_7 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_7, 36)

# How can we optimize the hyperparameter values?
# 8. Model with cross-validation and hyperparameter tuning ---------------------
# Define hyperparameter grid
param_grid <- expand.grid(
yearly_fourier_order = c(1, 3, 5),
weekly_fourier_order = c(1, 3, 5),
monthly_fourier_order = c(1, 3, 5)
)
# Initialize a data frame to store results
cv_results <- data.frame()
# Loop over each combination of hyperparameters
for (i in 1:nrow(param_grid)) {
params <- param_grid[i, ]
temp_model <- prophet(
yearly.seasonality = params$yearly_fourier_order,
weekly.seasonality = params$weekly_fourier_order,
daily.seasonality = FALSE,
changepoints = changepoints,
holidays = holidays
) %>% add_seasonality(
name = 'monthly',
period = 30.5,
fourier.order = params$monthly_fourier_order
) %>% add_regressor(
'x1'
) %>% add_regressor(
'x2'
) %>% fit.prophet(prophet_df)
# Perform cross-validation
cv <- cross_validation(
temp_model,
initial = 365 * 2, # First 2 years for training
period = 180, # Retrain every 6 months
horizon = 365, # Forecast horizon of 1 year
units = 'days'
)
# Compute performance metrics
perf <- performance_metrics(cv)
# Store the results
cv_results <- rbind(
cv_results,
data.frame(
yearly_fourier_order = params$yearly_fourier_order,
weekly_fourier_order = params$weekly_fourier_order,
monthly_fourier_order = params$monthly_fourier_order,
MAPE = perf$mape[1],
RMSE = perf$rmse[1]
)
)
}
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Making 10 forecasts with cutoffs between 2017-01-24 and 2021-07-02
## Warning in .local(object, ...): non-zero return code in optimizing
## Optimization terminated abnormally. Falling back to Newton optimizer.
print(cv_results)
## yearly_fourier_order weekly_fourier_order monthly_fourier_order MAPE
## 1 1 1 1 9.810883
## 2 3 1 1 7.613038
## 3 5 1 1 8.015353
## 4 1 3 1 9.810883
## 5 3 3 1 7.613038
## 6 5 3 1 8.015353
## 7 1 5 1 9.818374
## 8 3 5 1 7.610579
## 9 5 5 1 8.042792
## 10 1 1 3 10.151347
## 11 3 1 3 7.790303
## 12 5 1 3 8.250467
## 13 1 3 3 10.151347
## 14 3 3 3 7.790303
## 15 5 3 3 8.250467
## 16 1 5 3 10.144985
## 17 3 5 3 7.803658
## 18 5 5 3 8.229918
## 19 1 1 5 10.415708
## 20 3 1 5 8.077234
## 21 5 1 5 8.513002
## 22 1 3 5 10.415708
## 23 3 3 5 8.077234
## 24 5 3 5 8.513002
## 25 1 5 5 10.400745
## 26 3 5 5 8.080000
## 27 5 5 5 8.510549
## RMSE
## 1 1.661312
## 2 1.636427
## 3 1.649546
## 4 1.661312
## 5 1.636427
## 6 1.649546
## 7 1.663086
## 8 1.636987
## 9 1.649173
## 10 1.674417
## 11 1.647289
## 12 1.661413
## 13 1.674417
## 14 1.647289
## 15 1.661413
## 16 1.673142
## 17 1.649093
## 18 1.659843
## 19 1.678496
## 20 1.651586
## 21 1.665529
## 22 1.678496
## 23 1.651586
## 24 1.665529
## 25 1.679063
## 26 1.652705
## 27 1.665236
# Find the best hyperparameters based on RMSE
best_params <- cv_results %>% filter(MAPE == min(MAPE))
print("Best Hyperparameters:")
## [1] "Best Hyperparameters:"
print(best_params)
## yearly_fourier_order weekly_fourier_order monthly_fourier_order MAPE
## 1 3 5 1 7.610579
## RMSE
## 1 1.636987
# Final model with best hyperparameters
model8 <- prophet(
yearly.seasonality = best_params$yearly_fourier_order[1],
weekly.seasonality = best_params$weekly_fourier_order[1],
daily.seasonality = FALSE,
changepoints = changepoints,
holidays = holidays
) %>% add_seasonality(
name = 'monthly',
period = 30.5,
fourier.order = best_params$monthly_fourier_order[1]
) %>% add_regressor(
'x1'
) %>% add_regressor(
'x2'
) %>% fit.prophet(prophet_df)
# Plot the results
predictions <- predict(model8, prophet_df)
p <- plot(model8, predictions)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model8, predictions)

# Check residuals
residuals_model_8 <- prophet_df$y - predictions$yhat
CheckResiduals.ICAI(residuals_model_8, 36)

### Predicting new data --------------------------------------------------------
# Make future predictions, including regressors
prophet_df_test <- make_future_dataframe(
model8,
periods = 182, # Number of samples to forecast
freq = 'days', # Frequency of samples to forecast
include_history = FALSE # indicate if previous dates should be included
)
prophet_df_test$x1 <- test_data$input1
prophet_df_test$x2 <- test_data$input2
# Plot the results
predictions_test <- predict(model8, prophet_df_test)
p <- plot(model8, predictions_test) +
geom_point(data=test_data, aes(x = prophet_df_test$ds, y = output),
color = "red", alpha = 0.5)
ggplotly(p)
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
## Warning: Aspect ratios aren't yet implemented, but you can manually set a
## suitable height/width
# Plot prophet components
prophet_plot_components(model8, predictions_test)

residuals_test <- test_data$output - predictions_test$yhat
CheckResiduals.ICAI(residuals_test, 36)

# Evaluate the model
accuracy(predictions$yhat, train_data$output)
## ME RMSE MAE MPE MAPE
## Test set 1.551349e-05 1.52606 1.214273 133.7258 217.6792
accuracy(predictions_test$yhat, test_data$output)
## ME RMSE MAE MPE MAPE
## Test set 0.3384664 1.649087 1.344193 1.274717 8.551801
### Check more features at https://facebook.github.io/prophet/docs/quick_start.html